MUHAMMAD ARIENAL HAQ - PORTFOLIO
# Import necessary libraries
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.subplots as sp
# Import US fortune 1000 companies 2022 CSV file
fortune_df = pd.read_csv('E:\\Data Analyst Mastery\\Fortune Top 1000 Companies Analysis\\fortune 1000 companies in 2022 - 2022.csv')
fortune_df.head(10)
| rank in 2022 | Name | Revenue | revenue(% change) | profits in millions | profits % change\r | assets | market value | change in rank of top 1000 companies | employees | change in rank(500 only)\r\n | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | Walmart | $572,754 | 2.40% | $13,673 | 1.20% | $244,860 | $409,795 | - | 2,300,000 | - |
| 1 | 2 | Amazon | $469,822 | 21.70% | $33,364 | 56.40% | $420,549 | $1,658,807.30 | - | 1,608,000 | - |
| 2 | 3 | Apple | $365,817 | 33.30% | $94,680 | 64.90% | $351,002 | $2,849,537.60 | - | 154,000 | - |
| 3 | 4 | CVS Health | $292,111 | 8.70% | $7,910 | 10.20% | $232,999 | $132,839.20 | - | 258,000 | - |
| 4 | 5 | UnitedHealth Group | $287,597 | 11.80% | $17,285 | 12.20% | $212,206 | $479,830.30 | - | 350,000 | - |
| 5 | 6 | Exxon Mobil | $285,640 | 57.40% | $23,040 | - | $338,923 | $349,652.40 | 4 | 63,000 | 4 |
| 6 | 7 | Berkshire Hathaway | $276,094 | 12.50% | $89,795 | 111.20% | $958,784 | $779,542.30 | -1 | 372,000 | -1 |
| 7 | 8 | Alphabet | $257,637 | 41.20% | $76,033 | 88.80% | $359,268 | $1,842,326.10 | 1 | 156,500 | 1 |
| 8 | 9 | McKesson | $238,228 | 3.10% | -$4,539 | -604.30% | $65,015 | $45,857.80 | -2 | 67,500 | -2 |
| 9 | 10 | AmerisourceBergen | $213,988.80 | 12.70% | $1,539.90 | - | $57,337.80 | $32,355.70 | -2 | 40,000 | -2 |
# Create new dataframe for top 100 companies
companies_df = fortune_df.iloc[:100,:]
# Check the information on the dataframe
companies_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 11 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 rank in 2022 100 non-null object
1 Name 100 non-null object
2 Revenue 100 non-null object
3 revenue(% change) 100 non-null object
4 profits in millions 100 non-null object
100 non-null object
6 assets 100 non-null object
7 market value 100 non-null object
8 change in rank of top 1000 companies 100 non-null object
9 employees 100 non-null object
10 change in rank(500 only)
100 non-null object
dtypes: object(11)
memory usage: 8.7+ KB
# Delete unneeded columns, because there are 2 identical columns
del companies_df['change in rank(500 only)\r\n']
companies_df.head(10)
| rank in 2022 | Name | Revenue | revenue(% change) | profits in millions | profits % change\r | assets | market value | change in rank of top 1000 companies | employees | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | Walmart | $572,754 | 2.40% | $13,673 | 1.20% | $244,860 | $409,795 | - | 2,300,000 |
| 1 | 2 | Amazon | $469,822 | 21.70% | $33,364 | 56.40% | $420,549 | $1,658,807.30 | - | 1,608,000 |
| 2 | 3 | Apple | $365,817 | 33.30% | $94,680 | 64.90% | $351,002 | $2,849,537.60 | - | 154,000 |
| 3 | 4 | CVS Health | $292,111 | 8.70% | $7,910 | 10.20% | $232,999 | $132,839.20 | - | 258,000 |
| 4 | 5 | UnitedHealth Group | $287,597 | 11.80% | $17,285 | 12.20% | $212,206 | $479,830.30 | - | 350,000 |
| 5 | 6 | Exxon Mobil | $285,640 | 57.40% | $23,040 | - | $338,923 | $349,652.40 | 4 | 63,000 |
| 6 | 7 | Berkshire Hathaway | $276,094 | 12.50% | $89,795 | 111.20% | $958,784 | $779,542.30 | -1 | 372,000 |
| 7 | 8 | Alphabet | $257,637 | 41.20% | $76,033 | 88.80% | $359,268 | $1,842,326.10 | 1 | 156,500 |
| 8 | 9 | McKesson | $238,228 | 3.10% | -$4,539 | -604.30% | $65,015 | $45,857.80 | -2 | 67,500 |
| 9 | 10 | AmerisourceBergen | $213,988.80 | 12.70% | $1,539.90 | - | $57,337.80 | $32,355.70 | -2 | 40,000 |
# Rename each header column
companies_df = companies_df.rename({'rank in 2022':'Rank','Revenue':'Revenue (Million USD)','revenue(% change)':'Revenue Change (%)','profits in millions':'Profits (Million USD)','profits % change\r':'Profits Change (%)','assets':'Assets (Million USD)','market value':'Market Value (Million USD)','change in rank of top 1000 companies':'Rank Change','employees':'Employees'}, axis=1)
companies_df.head(10)
| Rank | Name | Revenue (Million USD) | Revenue Change (%) | Profits (Million USD) | Profits Change (%) | Assets (Million USD) | Market Value (Million USD) | Rank Change | Employees | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | Walmart | $572,754 | 2.40% | $13,673 | 1.20% | $244,860 | $409,795 | - | 2,300,000 |
| 1 | 2 | Amazon | $469,822 | 21.70% | $33,364 | 56.40% | $420,549 | $1,658,807.30 | - | 1,608,000 |
| 2 | 3 | Apple | $365,817 | 33.30% | $94,680 | 64.90% | $351,002 | $2,849,537.60 | - | 154,000 |
| 3 | 4 | CVS Health | $292,111 | 8.70% | $7,910 | 10.20% | $232,999 | $132,839.20 | - | 258,000 |
| 4 | 5 | UnitedHealth Group | $287,597 | 11.80% | $17,285 | 12.20% | $212,206 | $479,830.30 | - | 350,000 |
| 5 | 6 | Exxon Mobil | $285,640 | 57.40% | $23,040 | - | $338,923 | $349,652.40 | 4 | 63,000 |
| 6 | 7 | Berkshire Hathaway | $276,094 | 12.50% | $89,795 | 111.20% | $958,784 | $779,542.30 | -1 | 372,000 |
| 7 | 8 | Alphabet | $257,637 | 41.20% | $76,033 | 88.80% | $359,268 | $1,842,326.10 | 1 | 156,500 |
| 8 | 9 | McKesson | $238,228 | 3.10% | -$4,539 | -604.30% | $65,015 | $45,857.80 | -2 | 67,500 |
| 9 | 10 | AmerisourceBergen | $213,988.80 | 12.70% | $1,539.90 | - | $57,337.80 | $32,355.70 | -2 | 40,000 |
# Find the string '-' in every column, except the negative sign
mask = np.column_stack([companies_df[col].astype(str).str.contains('(?:\s|^)-(?:\s|$)') for col in companies_df])
companies_df.loc[mask.any(axis=1)]
| Rank | Name | Revenue (Million USD) | Revenue Change (%) | Profits (Million USD) | Profits Change (%) | Assets (Million USD) | Market Value (Million USD) | Rank Change | Employees | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | Walmart | $572,754 | 2.40% | $13,673 | 1.20% | $244,860 | $409,795 | - | 2,300,000 |
| 1 | 2 | Amazon | $469,822 | 21.70% | $33,364 | 56.40% | $420,549 | $1,658,807.30 | - | 1,608,000 |
| 2 | 3 | Apple | $365,817 | 33.30% | $94,680 | 64.90% | $351,002 | $2,849,537.60 | - | 154,000 |
| 3 | 4 | CVS Health | $292,111 | 8.70% | $7,910 | 10.20% | $232,999 | $132,839.20 | - | 258,000 |
| 4 | 5 | UnitedHealth Group | $287,597 | 11.80% | $17,285 | 12.20% | $212,206 | $479,830.30 | - | 350,000 |
| 5 | 6 | Exxon Mobil | $285,640 | 57.40% | $23,040 | - | $338,923 | $349,652.40 | 4 | 63,000 |
| 9 | 10 | AmerisourceBergen | $213,988.80 | 12.70% | $1,539.90 | - | $57,337.80 | $32,355.70 | -2 | 40,000 |
| 12 | 13 | AT&T | $168,864 | -1.70% | $20,081 | - | $551,622 | $169,262.40 | -2 | 202,600 |
| 14 | 15 | Cardinal Health | $162,467 | 6.20% | $611 | - | $44,453 | $15,709.40 | -1 | 46,827 |
| 15 | 16 | Chevron | $162,465 | 71.60% | $15,625 | - | $239,535 | $317,120.10 | 11 | 42,595 |
| 18 | 19 | Marathon Petroleum | $141,032 | 58.50% | $9,738 | - | $85,373 | $47,758.10 | 13 | 17,700 |
| 21 | 22 | Ford Motor | $136,341 | 7.20% | $17,937 | - | $257,035 | $67,958.20 | -1 | 183,000 |
| 28 | 29 | Phillips 66 | $114,852 | 75.40% | $1,317 | - | $55,594 | $41,561 | 19 | 14,000 |
| 29 | 30 | Valero Energy | $108,332 | 80.20% | $930 | - | $57,888 | $41,572.20 | 23 | 9,804 |
| 41 | 42 | State Farm Insurance | $82,224.70 | 4.20% | $1,280.90 | -65.70% | $325,349.30 | - | -3 | 53,586 |
| 50 | 51 | Prudential Financial | $70,934 | 24.40% | $7,724 | - | $937,582 | $44,482.20 | 4 | 40,916 |
| 51 | 52 | Albertsons | $69,690.40 | 11.60% | $850.20 | 82.30% | $26,598 | $16,063.70 | - | 300,000 |
| 52 | 53 | Walt Disney | $67,418 | 3.10% | $1,995 | - | $203,609 | $249,718.10 | -3 | 171,000 |
| 53 | 54 | Energy Transfer | $67,417 | 73.10% | $5,470 | - | $105,963 | $34,496.90 | 27 | 12,558 |
| 57 | 58 | Raytheon Technologies | $64,388 | 13.80% | $3,864 | - | $161,404 | $147,640.90 | -1 | 174,000 |
| 59 | 60 | Boeing | $62,286 | 7.10% | -$4,202 | - | $138,552 | $113,058.70 | -6 | 142,000 |
| 60 | 61 | Morgan Stanley | $61,121 | 17.40% | $15,034 | 36.70% | $1,188,140 | $155,685.60 | - | 74,814 |
| 61 | 62 | HCA Healthcare | $58,752 | 14% | $6,956 | 85.30% | $50,742 | $75,691.90 | - | 244,000 |
| 66 | 67 | AIG | $52,057 | 19% | $9,388 | - | $596,112 | $50,608.20 | 5 | 36,600 |
| 71 | 72 | New York Life Insurance | $51,198.50 | 9.60% | $277.10 | - | $379,984.60 | - | -5 | 14,344 |
| 75 | 76 | Publix Super Markets | $48,393.90 | 7.10% | $4,412.20 | 11.10% | $31,524.30 | - | -7 | 232,000 |
| 76 | 77 | ConocoPhillips | $48,349 | 151.10% | $8,079 | - | $90,661 | $129,605.10 | 79 | 9,900 |
| 77 | 78 | Liberty Mutual Insurance Group | $48,200 | 10.10% | $3,068 | 304.70% | $156,043 | - | -7 | 45,000 |
| 79 | 80 | Nationwide | $47,376 | 13% | $1,617.20 | - | $278,412.20 | - | -4 | 24,134 |
| 81 | 82 | Bristol-Myers Squibb | $46,385 | 9.10% | $6,994 | - | $109,314 | $155,203.50 | -7 | 32,200 |
| 87 | 88 | Plains GP Holdings | $42,078 | 80.70% | $60 | - | $29,978 | $2,242.90 | 39 | 4,100 |
| 89 | 90 | TIAA | $40,526.40 | -2.60% | $4,060.70 | 627.60% | $698,190.80 | - | -11 | 15,065 |
| 92 | 93 | Coca-Cola | $38,655 | 17.10% | $9,771 | 26.10% | $94,354 | $268,769.70 | - | 79,000 |
| 94 | 95 | CHS | $38,448 | 35.40% | $554 | 31.10% | $17,576.30 | - | 8 | 9,941 |
| 95 | 96 | USAA | $37,469.60 | 3.20% | $3,300 | -15.50% | $210,676.50 | - | -9 | 37,335 |
| 96 | 97 | Northwestern Mutual | $36,751.20 | 8.80% | $977.80 | 130.10% | $334,653.80 | - | -7 | 7,585 |
| 99 | 100 | Massachusetts Mutual Life Insurance | $35,899.50 | 51.70% | $319.30 | - | $383,292.40 | - | 23 | 10,052 |
# Replaces the string '-' in each column with 0, except the negative sign
companies_df['Revenue Change (%)'] = companies_df['Revenue Change (%)'].str.replace('(?:\s|^)-(?:\s|$)', '0', regex=True)
companies_df['Profits (Million USD)'] = companies_df['Profits (Million USD)'].str.replace('(?:\s|^)-(?:\s|$)', '0', regex=True)
companies_df['Profits Change (%)'] = companies_df['Profits Change (%)'].str.replace('(?:\s|^)-(?:\s|$)', '0', regex=True)
companies_df['Market Value (Million USD)'] = companies_df['Market Value (Million USD)'].str.replace('(?:\s|^)-(?:\s|$)', '0', regex=True)
companies_df['Rank Change'] = companies_df['Rank Change'].str.replace('(?:\s|^)-(?:\s|$)', '0', regex=True)
companies_df['Employees'] = companies_df['Employees'].str.replace('(?:\s|^)-(?:\s|$)', '0', regex=True)
companies_df.head(10)
| Rank | Name | Revenue (Million USD) | Revenue Change (%) | Profits (Million USD) | Profits Change (%) | Assets (Million USD) | Market Value (Million USD) | Rank Change | Employees | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | Walmart | $572,754 | 2.40% | $13,673 | 1.20% | $244,860 | $409,795 | 0 | 2,300,000 |
| 1 | 2 | Amazon | $469,822 | 21.70% | $33,364 | 56.40% | $420,549 | $1,658,807.30 | 0 | 1,608,000 |
| 2 | 3 | Apple | $365,817 | 33.30% | $94,680 | 64.90% | $351,002 | $2,849,537.60 | 0 | 154,000 |
| 3 | 4 | CVS Health | $292,111 | 8.70% | $7,910 | 10.20% | $232,999 | $132,839.20 | 0 | 258,000 |
| 4 | 5 | UnitedHealth Group | $287,597 | 11.80% | $17,285 | 12.20% | $212,206 | $479,830.30 | 0 | 350,000 |
| 5 | 6 | Exxon Mobil | $285,640 | 57.40% | $23,040 | 0 | $338,923 | $349,652.40 | 4 | 63,000 |
| 6 | 7 | Berkshire Hathaway | $276,094 | 12.50% | $89,795 | 111.20% | $958,784 | $779,542.30 | -1 | 372,000 |
| 7 | 8 | Alphabet | $257,637 | 41.20% | $76,033 | 88.80% | $359,268 | $1,842,326.10 | 1 | 156,500 |
| 8 | 9 | McKesson | $238,228 | 3.10% | -$4,539 | -604.30% | $65,015 | $45,857.80 | -2 | 67,500 |
| 9 | 10 | AmerisourceBergen | $213,988.80 | 12.70% | $1,539.90 | 0 | $57,337.80 | $32,355.70 | -2 | 40,000 |
# Remove the strings '$' and ',' on each column
companies_df['Revenue (Million USD)'] = companies_df['Revenue (Million USD)'].str.replace('[$,]', '', regex=True)
companies_df['Revenue Change (%)'] = companies_df['Revenue Change (%)'].str.replace('[%]', '', regex=True)
companies_df['Profits (Million USD)'] = companies_df['Profits (Million USD)'].str.replace('[$,]', '', regex=True)
companies_df['Profits Change (%)'] = companies_df['Profits Change (%)'].str.replace('[%]', '', regex=True)
companies_df['Assets (Million USD)'] = companies_df['Assets (Million USD)'].str.replace('[$,]', '', regex=True)
companies_df['Market Value (Million USD)'] = companies_df['Market Value (Million USD)'].str.replace('[$,]', '', regex=True)
companies_df['Employees'] = companies_df['Employees'].str.replace('[,]', '', regex=True)
companies_df.head(10)
| Rank | Name | Revenue (Million USD) | Revenue Change (%) | Profits (Million USD) | Profits Change (%) | Assets (Million USD) | Market Value (Million USD) | Rank Change | Employees | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | Walmart | 572754 | 2.40 | 13673 | 1.20 | 244860 | 409795 | 0 | 2300000 |
| 1 | 2 | Amazon | 469822 | 21.70 | 33364 | 56.40 | 420549 | 1658807.30 | 0 | 1608000 |
| 2 | 3 | Apple | 365817 | 33.30 | 94680 | 64.90 | 351002 | 2849537.60 | 0 | 154000 |
| 3 | 4 | CVS Health | 292111 | 8.70 | 7910 | 10.20 | 232999 | 132839.20 | 0 | 258000 |
| 4 | 5 | UnitedHealth Group | 287597 | 11.80 | 17285 | 12.20 | 212206 | 479830.30 | 0 | 350000 |
| 5 | 6 | Exxon Mobil | 285640 | 57.40 | 23040 | 0 | 338923 | 349652.40 | 4 | 63000 |
| 6 | 7 | Berkshire Hathaway | 276094 | 12.50 | 89795 | 111.20 | 958784 | 779542.30 | -1 | 372000 |
| 7 | 8 | Alphabet | 257637 | 41.20 | 76033 | 88.80 | 359268 | 1842326.10 | 1 | 156500 |
| 8 | 9 | McKesson | 238228 | 3.10 | -4539 | -604.30 | 65015 | 45857.80 | -2 | 67500 |
| 9 | 10 | AmerisourceBergen | 213988.80 | 12.70 | 1539.90 | 0 | 57337.80 | 32355.70 | -2 | 40000 |
# Change the data type of each column as needed
convert = {'Rank' : int, 'Revenue (Million USD)' : float, 'Revenue Change (%)' : float, 'Profits (Million USD)' : float, 'Profits Change (%)' : float, 'Assets (Million USD)' : float, 'Market Value (Million USD)' : float, 'Rank Change' : int, 'Employees' : int}
companies_df = companies_df.astype(convert)
# Check the data type of each column
companies_df.dtypes
Rank int32 Name object Revenue (Million USD) float64 Revenue Change (%) float64 Profits (Million USD) float64 Profits Change (%) float64 Assets (Million USD) float64 Market Value (Million USD) float64 Rank Change int32 Employees int32 dtype: object
# Check for the presence of null from each column
companies_df.isna().sum()
Rank 0 Name 0 Revenue (Million USD) 0 Revenue Change (%) 0 Profits (Million USD) 0 Profits Change (%) 0 Assets (Million USD) 0 Market Value (Million USD) 0 Rank Change 0 Employees 0 dtype: int64
# Export DataFrame to Excel and CSV file
output_file_path_excel = 'E:\\Data Analyst Mastery\\Fortune Top 1000 Companies Analysis\\usFortuneTop100Companies2022Cleaned.xlsx'
output_file_path_csv = 'E:\\Data Analyst Mastery\\Fortune Top 1000 Companies Analysis\\usFortuneTop100Companies2022Cleaned.csv'
companies_df.to_excel(output_file_path_excel, index=False)
companies_df.to_csv(output_file_path_csv, sep='$', index=False)
# Visualize Top 10 Based On Revenue and All Companies Revenue Change
n = 10
fig1 = px.bar(
companies_df.nlargest(n, columns = 'Revenue (Million USD)'),
x = 'Name',
y = 'Revenue (Million USD)',
color = 'Name',
color_discrete_sequence = px.colors.sequential.thermal
)
fig2 = px.violin(
companies_df,
y ='Revenue Change (%)',
box = True,
points = 'all'
)
fig1_traces = []
fig2_traces = []
for trace in range(len(fig1['data'])):
fig1_traces.append(fig1['data'][trace])
for trace in range(len(fig2['data'])):
fig2_traces.append(fig2['data'][trace])
this_fig = sp.make_subplots(
rows = 1,
cols = 2,
subplot_titles = ['<b>Top 10 Based On Revenue (Million USD)</b>', '<b>All Companies Revenue Change (%)</b>']
)
this_fig.update_layout(
height = 500,
width = 1000,
font_family = 'verdana',
showlegend = False,
barmode = 'stack',
plot_bgcolor = 'rgb(245,245,245)'
)
for traces in fig1_traces:
this_fig.append_trace(traces, row = 1, col = 1)
for traces in fig2_traces:
this_fig.append_trace(traces, row = 1, col = 2)
this_fig.update_xaxes(fixedrange = True, tickangle = -45)
this_fig.update_yaxes(fixedrange = True, ticksuffix = ' ')
this_fig.show()
# Visualize Top 10 Based On Profits and All Companies Profits Change
fig1 = px.bar(
companies_df.nlargest(n, columns = 'Profits (Million USD)'),
x = 'Name',
y = 'Profits (Million USD)',
color = 'Name',
color_discrete_sequence = px.colors.sequential.thermal
)
fig2 = px.violin(
companies_df,
y = 'Profits Change (%)',
box = True,
points = 'all'
)
fig1_traces = []
fig2_traces = []
for trace in range(len(fig1['data'])):
fig1_traces.append(fig1['data'][trace])
for trace in range(len(fig2['data'])):
fig2_traces.append(fig2['data'][trace])
this_fig = sp.make_subplots(
rows = 1,
cols = 2,
subplot_titles = ['<b>Top 10 Based On Profits (Million USD)</b>', '<b>All Companies Profits Change (%)</b>']
)
this_fig.update_layout(
height = 500,
width = 1000,
font_family='verdana',
showlegend=False,
barmode='stack',
plot_bgcolor='rgb(245,245,245)'
)
for traces in fig1_traces:
this_fig.append_trace(traces, row = 1, col = 1)
for traces in fig2_traces:
this_fig.append_trace(traces, row = 1, col = 2)
this_fig.update_xaxes(fixedrange = True, tickangle = -45)
this_fig.update_yaxes(fixedrange = True, ticksuffix = ' ')
this_fig.show()
# Visualize Top 10 Based On Assets
fig = px.bar(
companies_df.nlargest(n, columns = 'Assets (Million USD)'),
x = 'Name',
y = 'Assets (Million USD)',
barmode = 'relative',
color = 'Name',
color_discrete_sequence = px.colors.sequential.thermal,
height = 500,
width = 550,
title = '<b>Top 10 Companies Based On Assets</b>'
)
fig.update_layout(
font_family = 'verdana',
title_font_size = 20,
title_x = 0.5,
xaxis_title = None,
yaxis_title = None,
showlegend = False,
plot_bgcolor = 'rgb(245,245,245)'
)
fig.update_xaxes(fixedrange = True, tickangle = -45)
fig.update_yaxes(fixedrange = True, ticksuffix = ' ')
fig.show()
# Visualize Top 10 Based On Market Value
fig = px.bar(
companies_df.nlargest(n, columns = 'Market Value (Million USD)'),
x = 'Name',
y = 'Market Value (Million USD)',
barmode = 'relative',
color = 'Name',
color_discrete_sequence = px.colors.sequential.thermal,
height = 500,
width = 550,
title = '<b>Top 10 Companies Based On Market Value</b>'
)
fig.update_layout(
font_family = 'verdana',
title_font_size = 20,
title_x = 0.5,
xaxis_title = None,
yaxis_title = None,
showlegend = False,
plot_bgcolor = 'rgb(245,245,245)')
fig.update_xaxes(fixedrange = True, tickangle = -45)
fig.update_yaxes(fixedrange = True, ticksuffix = ' ')
fig.show()
# Visualize Top 10 Based On Employees
fig = px.bar(
companies_df.nlargest(n, columns = 'Employees'),
x = 'Name',
y = 'Employees',
barmode = 'relative',
color = 'Name',
color_discrete_sequence = px.colors.sequential.thermal,
height = 500,
width = 550,
title = '<b>Top 10 Companies Based On Employees</b>',
)
fig.update_layout(
font_family = 'verdana',
title_font_size = 20,
title_x = 0.5,
xaxis_title = None,
yaxis_title = None,
showlegend = False,
plot_bgcolor = 'rgb(245,245,245)')
fig.update_xaxes(fixedrange = True, tickangle = -45)
fig.update_yaxes(fixedrange = True, ticksuffix = ' ')
fig.show()
# Visualize Relationship Between Rank and Performance
fig1 = px.scatter(
companies_df,
x = 'Rank',
y = 'Revenue (Million USD)',
color = 'Revenue (Million USD)',
hover_data = ['Name'],
trendline = 'ols'
)
fig2 = px.scatter(
companies_df,
x = 'Rank',
y = 'Profits (Million USD)',
color = 'Profits (Million USD)',
hover_data = ['Name'],
trendline = 'ols'
)
fig1_traces = []
fig2_traces = []
for trace in range(len(fig1['data'])):
fig1_traces.append(fig1['data'][trace])
for trace in range(len(fig2['data'])):
fig2_traces.append(fig2['data'][trace])
this_fig = sp.make_subplots(
rows = 1,
cols = 2,
subplot_titles = ['<b>Rank and Revenue</b>', '<b>Rank and Profits</b>']
)
this_fig.update_layout(
height = 500,
width = 1000,
font_family = 'verdana',
showlegend = False,
title = '<b>Relationship Between Rank and Performance<b>',
title_font_size = 20,
title_x = 0.5,
coloraxis_autocolorscale = False,
coloraxis_colorscale = ['rgb(0, 63, 92)', 'rgb(88, 80, 141)', 'rgb(188, 80, 144)', 'rgb(255, 99, 97)', 'rgb(255, 166, 0)'],
plot_bgcolor = 'rgb(245,245,245)'
)
for traces in fig1_traces:
this_fig.append_trace(traces, row = 1, col = 1)
for traces in fig2_traces:
this_fig.append_trace(traces, row = 1, col = 2)
this_fig.update_xaxes(fixedrange = True, tickangle = -45)
this_fig.update_yaxes(fixedrange = True, ticksuffix = ' ', type = 'log')
this_fig['layout']['xaxis']['title'] = 'Rank'
this_fig['layout']['xaxis2']['title'] = 'Rank'
this_fig.show()
col1, col2 = 'Rank', 'Revenue (Million USD)'
corr = companies_df[col1].corr(companies_df[col2])
print('1. Correlation between Rank and Revenue: ', round(corr, 2))
col1, col2 = 'Rank', 'Profits (Million USD)'
corr = companies_df[col1].corr(companies_df[col2])
print('2. Correlation between Rank and Profits: ', round(corr, 2))
1. Correlation between Rank and Revenue: -0.77 2. Correlation between Rank and Profits: -0.44
# Visualize Relationship Between Rank and Values
fig1 = px.scatter(
companies_df,
x = 'Rank',
y = 'Assets (Million USD)',
color = 'Assets (Million USD)',
hover_data = ['Name'],
trendline = 'ols',
)
fig2 = px.scatter(
companies_df,
x = 'Rank',
y = 'Market Value (Million USD)',
color = 'Market Value (Million USD)',
hover_data = ['Name'],
trendline = 'ols',
)
fig1_traces = []
fig2_traces = []
for trace in range(len(fig1['data'])):
fig1_traces.append(fig1['data'][trace])
for trace in range(len(fig2['data'])):
fig2_traces.append(fig2['data'][trace])
this_fig = sp.make_subplots(
rows = 1,
cols = 2,
subplot_titles = ['<b>Rank and Assets</b>', '<b>Rank and Market Value</b>']
)
this_fig.update_layout(
height = 500,
width = 1000,
font_family = 'verdana',
showlegend = False,
title = '<b>Relationship Between Rank and Values<b>',
title_font_size = 20,
title_x = 0.5,
coloraxis_autocolorscale = False,
coloraxis_colorscale = ['rgb(0, 63, 92)', 'rgb(88, 80, 141)', 'rgb(188, 80, 144)', 'rgb(255, 99, 97)', 'rgb(255, 166, 0)'],
plot_bgcolor = 'rgb(245,245,245)'
)
for traces in fig1_traces:
this_fig.append_trace(traces, row = 1, col = 1)
for traces in fig2_traces:
this_fig.append_trace(traces, row = 1, col = 2)
this_fig.update_xaxes(fixedrange = True, tickangle = -45)
this_fig.update_yaxes(fixedrange = True, ticksuffix = ' ', type='log')
this_fig['layout']['xaxis']['title']='Rank'
this_fig['layout']['xaxis2']['title']='Rank'
this_fig.show()
col1, col2 = 'Rank', 'Assets (Million USD)'
corr = companies_df[col1].corr(companies_df[col2])
print('1. Correlation between Rank and Revenue: ', round(corr, 2))
col1, col2 = 'Rank', 'Market Value (Million USD)'
corr = companies_df[col1].corr(companies_df[col2])
print('2. Correlation between Rank and Profits: ', round(corr, 2))
1. Correlation between Rank and Revenue: -0.14 2. Correlation between Rank and Profits: -0.38